/******************************************************************************************************
 *______________________________________Combine Depth and Volume______________________________________*
 ******************************************************************************************************/

/*
PURPOSE:

  This script merges the output from Volume_Interval.sas and Depth_Interval.sas
  for the year 2015. Afterwards, it computes the volume shares and depth shares for
  the top 5 maker-taker exchanges and the top 8 exchanges.

PRIMARY INPUT:

  "Symbol_Universe_2015" is the output of the Sample Selection process. It is a symbol-level dataset
  containing symbol characteristics, average daily volume over 2015, sample indicators, and more.
  The key variables we extract from this dataset are f_Sample, which identifies the sample used
  in the paper, f_Sample_T100, which identifies the top 100 symbols in the sample, and f_NYSE_Listed,
  which allows us to separate the analysis for NYSE-listed and non-NYSE-listed symbols.

  "Volume_2015" is the output of the "Volume.sas". This is a dataset containing
  the volume traded on a particular symbol-date-exchange combination (e.g. the volume traded on NYSE
  for SPY on March 8, 2015). It contains an observation for any symbol-date-exchange combination
  with non-zero volume.

  "Depth_2015" is the output of the "Depth.sas". This is a dataset containing
  the depth of an exchange on a particular symbol-date (e.g. the depth traded on NYSE
  for SPY on March 8, 2015). For the definition of depth we use, look at "Depth.sas".
  This dataset contains an observation for all symbol-dates with valid quoting and for all Top 8
  exchanges even if they did not quote for a particular symbol-date.

APPROACH:

  This macro loads the symbol universe file, the depth interval dataset,
  and the volume interval dataset. From the symbol universe file it extracts
  the list of symbols in the sample used in the paper.

  Then it merges the volume dataset with the depth dataset. It merges the combined dataset
  with the list of sample symbols, which drops all symbols outside of this sample.
  Then it computes the volume and depth shares among the Top 8 exchanges
  and the top 5 maker-taker exchanges.

  The output is symbol-date(interval)-exchange level dataset with volume shares and depth shares
  for the symbols in the sample.

SUPPLEMENTAL INPUT FILES:

    "Server_Directories.xlsx"

PREREQ:

    Before running this file, verify that you have completed the "Sample Selection" Procedure,
    and have run "Volume_Interval.sas" and "Depth_Interval.sas".

    Also verify that you have entered the path to your input directory where it reads:
    "Import Input Parameters".

INSTRUCTION:

    Scroll to the "GLOBAL VARIABLES" section. Verify that "year_input" is set to 2015.
    Then run from the SAS Studio website.
*/

/*****************************************************************************************
 *_______________________________________INITIALIZE______________________________________*
 *****************************************************************************************/

/*
 * WRDS LIBRARY:
 *
 * This sets up the wharton library, which allows us to access the CRSP and TAQ datasets.
 */
%include '/wrds/lib/utility/wrdslib.sas';
options mprint;

/*
 * IMPORT INPUT PARAMETERS:
 *
 * Make sure to enter the path to your input directory below.
 */
proc import datafile="/home/uchicago/eche/sasuser.v94/WRDS/Input/Server_Directories"
  out= server_directories
  dbms = xlsx
  replace;
run;

data directory_import;
  set server_directories;

  if directory = "user_directory" then do; call symput('userdir', trim(path)); end;
  if directory = "volume_directory" then do; call symput('voldir', trim(path)); end;
  if directory = "temp_directory" then do; call symput('tempdir', trim(path)); end;
run;

/**
 * Directories:
 *
 * Sets up the directories according to the paths in the "Server_Directories.xlsx" file.
 */
libname userdir &userdir;
libname voldir &voldir;
libname tempdir &tempdir;

/* Set user_path for import/export
 * We need to dequote the path passed in from "Server_Directories.xlsx"
 */
%let user_path  = %qsysfunc(dequote(&userdir));


/**********************************************************************************************
 *______________________________________GLOBAL VARIABLES______________________________________*
 **********************************************************************************************/

/* This variable choose the year for which we combine depth and volume */
%let year_input = 2015;

/***********************************************************************************
 *______________________________________MACRO______________________________________*
 ***********************************************************************************/

/*
 * _MACRO_ : Combine_Depth_Volume(YYYY)
 *
 * PARAM: YYYY: The year for which this macro calculates symbol-date-exchange volume shares
 *
 * This Macro loads the symbol universe file and extracts the symbols in Sample 1.
 * Then this macro loads the symbol-date-exchange level volume and depth data.
 * We merge the volume onto the depth, and then the depth-volume data onto the symbol universe
 * file for Sample 1 symbols only.
 *
 * Then we calculate depth and volume shares among the top 8 and the top 5 exchanges.
 *
 * OUTPUT: Depth_Volume_Shares_&YYYY..csv, symbol-date(interval)-exchange level dataset with volume, depth,
 *  volume shares, and depth shares
 */

%Macro Combine_Depth_Volume(YYYY);
  /*
   * INPUT: tempdir.symbol_universe_2015, symbol-level dataset containing sample indicators
   *
   * Loads the symbol universe dataset
   *
   * OUTPUT: symbol_universe, symbol-level dataset containing sample indicators
   */
  data symbol_universe;
    set tempdir.symbol_universe_&YYYY.;
  run;

  /*
   * INPUT: symbol_universe, symbol-level dataset containing sample indicators
   *
   * Restricts the symbol universe dataset to symbols the sample in our paper
   *
   * OUTPUT: symbol_sample, list of symbols in the symbol sample with sample indicators
   */
  data symbol_sample;
    set symbol_universe;
    symbol = trim(sym_root) || trim(sym_suffix);

    /* Sort the sample by symbol */
    proc sort;
      by symbol sym_root sym_suffix;
  run;

  /*
   * INPUT: Depth_&YYYY:, date-symbol-interval-exchange level depth dataset
   *
   * Loads the depth dataset.
   * For every observation, we assign flags if the exchange is in the Top 8 exchanges
   * or if additionally the exchange is one of the top 5 maker-taker exchanges.
   *
   * If the Depth_Interval files were generated monthly, this data step will load all the monthly depth
   * datasets into a single annual dataset.
   *
   * OUTPUT: depth, date-symbol-interval-exchange level depth dataset with sample indicators
   */
  data depth_top5;
    set tempdir.depth_top5_&YYYY.;
    format date yymmdd10.;


    /* The exchange codes and names of the Top 8 exchanges are listed below.
     * The first 5 exchanges are the Top 5 Maker-Taker exchanges.
     *
     * 'N': NYSE
     * 'P': NYSE Arca
     * 'T': Nasdaq (Tape A and B). This also incorporates volume formerly under code 'Q' (Nasdaq Tape C).
     * 'Z': Bats BZX
     * 'K': Bats EDGX
     * 'B': Nasdaq (BX)
     * 'J': Bats EDGA
     * 'Y': Bats BYX
     */
    f_Top_8 = ex in ('N','P','T','Z','K','B','J','Y');
    f_Main_Maker = ex in ('N','P','T','Z','K');

    symbol = trim(sym_root) || trim(sym_suffix);

    /* Sort the depth data by symbol, date, interval, and exchange */
    proc sort;
      by symbol sym_root sym_suffix date interval ex;
  run;

  /*
   * INPUT: Volume_&YYYY, date-symbol-interval-exchange level volume dataset
   *
   * Loads the volume dataset.
   * For every observation, we assign flags if the exchange is in the Top 8 exchanges
   * or if additionally the exchange is one of the top 5 maker-taker exchanges.
   *
   * OUTPUT: volume, date-symbol-interval-exchange level volume dataset with sample indicators
   */
  data volume;
    set tempdir.volume_&YYYY.;
    format date yymmdd10.;


    /* The exchange codes and names of the Top 8 exchanges are listed below.
     * The first 5 exchanges are the Top 5 Maker-Taker exchanges.
     *
     * 'N': NYSE
     * 'P': NYSE Arca
     * 'T': Nasdaq (Tape A and B). This also incorporates volume formerly under code 'Q' (Nasdaq Tape C).
     * 'Z': Bats BZX
     * 'K': Bats EDGX
     * 'B': Nasdaq (BX)
     * 'J': Bats EDGA
     * 'Y': Bats BYX
     */
    f_Top_8 = ex in ('N','P','T','Z','K','B','J','Y');
    f_Main_Maker = ex in ('N','P','T','Z','K');

    symbol = trim(sym_root) || trim(sym_suffix);

    /* Sort the volume data by symbol, date, interval, and exchange */
    proc sort;
      by symbol sym_root sym_suffix date interval ex;
  run;

  data volume_top5;
  	set volume;
  	where f_Main_Maker = 1;
  run;

  /*
   * INPUTA: depth, date-symbol-interval-exchange level depth dataset
   * INPUTB: volume, date-symbol-interval-exchange level volume dataset
   *
   * Merges the volume dataset onto the depth dataset.
   *
   * Note that due to how we calculate depth, the depth dataset has a record
   * for each exchange for any date-symbol-interval combination.
   * For the volume dataset, if a exchange does not experience any trading volume
   * for a given date-symbol-interval combination, it does not appear.
   *
   * When we merge volume onto depth, if a particular date-symbol-interval-exchange combination
   * did not experience any trading, the volume for that row will be null.
   * We change these nulls to 0.
   *
   * OUTPUT: Depth_Volume, date-symbol-interval-exchange level volume and depth dataset
   */
  data Depth_Volume;
    merge depth_top5(in = a) volume_top5(in = b);
    by symbol sym_root sym_suffix date interval ex;

    /* If volume or depth are null, then no volume/depth occurred for this
     * symbol-date-exchange combination.
     */
    if volume_s = . then volume_s = 0;
    if volume_d = . then volume_d = 0;

    if avgDepth = . then avgDepth = 0;

    /*
     * If volume and depth are both 0 for an observation, we set these values to null.
     * We ignore these observations in the depth-volume figures and regressions.
     *
     * These observations correspond with observations of NYSE for non-NYSE symbols
     * Or intervals where a symbol-exchange pair experiences no volume or depth.
     */
    if (volume_s = 0 and avgDepth = 0) then do;
      volume_s = .;
      avgDepth = .;
    end;

    keep symbol sym_root sym_suffix date interval ex volume_s avgDepth f_:;
  run;

  proc sql;
    create table Depth_Volume_Raw_Shares
    as select
    a.date, a.symbol, a.sym_root, a.sym_suffix, a.interval, a.ex,
    a.f_main_maker,
    a.avgDepth,
    a.volume_s,
    case when ~a.f_main_maker then . else a.avgDepth / sum(a.avgDepth * a.f_main_maker) end as depth_share_main_maker,
    case when ~a.f_main_maker then . else a.volume_s / sum(a.volume_s * a.f_main_maker) end as volume_share_main_maker
    from Depth_Volume as a
    group by date, interval, symbol, sym_root, sym_suffix;
  quit;

  data Depth_Volume_Shares_Flags;
    merge Depth_Volume_Raw_Shares(in = a) symbol_sample(in = b);
    by sym_root sym_suffix;

  run;

  /* Export to the user directory */
  data tempdir.Depth_Volume_Shares_Sample_&YYYY.;
    set Depth_Volume_Shares_Flags;
    where f_sample = 1;
  run;

  /* Export to CSV */
  proc export data=tempdir.Depth_Volume_Shares_Sample_&YYYY.
    outfile="&user_path./Depth_Volume_Shares_Sample_&YYYY..csv"
    dbms=csv
    replace;
  run;

  /* Export to CSV */
  proc export data = Depth_Volume_Shares_Flags
    outfile="&user_path./Depth_Volume_Raw_&YYYY..csv"
    dbms=csv
    replace;
  run;

%Mend;

/*
 *********************************************************************************
 * ______________________________________Run______________________________________
 *********************************************************************************
 */

%Combine_Depth_Volume(&year_input.);
